In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import mlxtend
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
import seaborn as sns
In [2]:
df = pd.read_csv("D:\\Documents\\BSCS 7D\\Data science\\data.csv", encoding='latin-1')
In [3]:
df.head()
Out[3]:
InvoiceNo StockCode Description Quantity InvoiceDate UnitPrice CustomerID Country
0 536365 85123A WHITE HANGING HEART T-LIGHT HOLDER 6 12/1/2010 8:26 2.55 17850.0 United Kingdom
1 536365 71053 WHITE METAL LANTERN 6 12/1/2010 8:26 3.39 17850.0 United Kingdom
2 536365 84406B CREAM CUPID HEARTS COAT HANGER 8 12/1/2010 8:26 2.75 17850.0 United Kingdom
3 536365 84029G KNITTED UNION FLAG HOT WATER BOTTLE 6 12/1/2010 8:26 3.39 17850.0 United Kingdom
4 536365 84029E RED WOOLLY HOTTIE WHITE HEART. 6 12/1/2010 8:26 3.39 17850.0 United Kingdom
In [4]:
df.shape
Out[4]:
(541909, 8)

Data Cleaning

In [5]:
def data_cleaning(data):
    #we removed spaces before adn after decription
    data['Description'] = data['Description'].str.strip()
    #duplicate invoice remove kardi
    data.dropna(axis=0, subset=['InvoiceNo'], inplace=True)
    #invoiceNo type convert to string
    data['InvoiceNo'] = data['InvoiceNo'].astype('str')
    #cancelled transaction removed (C mean invoice wasnt generated)
    data = data[~data['InvoiceNo'].str.contains('C')]
    #Drop extra columns
    data = data.drop(columns=['StockCode', 'InvoiceDate', 'UnitPrice', 'CustomerID'])
    return data

df = data_cleaning(df)
In [6]:
df.shape
Out[6]:
(532621, 4)
In [7]:
df.isna().sum()
Out[7]:
InvoiceNo         0
Description    1454
Quantity          0
Country           0
dtype: int64
In [8]:
#removed null
df = df.dropna()
In [9]:
#updated indexes
df = df.reset_index()   
df.drop(["index"], axis=1, inplace=True)
In [10]:
df.head()
Out[10]:
InvoiceNo Description Quantity Country
0 536365 WHITE HANGING HEART T-LIGHT HOLDER 6 United Kingdom
1 536365 WHITE METAL LANTERN 6 United Kingdom
2 536365 CREAM CUPID HEARTS COAT HANGER 8 United Kingdom
3 536365 KNITTED UNION FLAG HOT WATER BOTTLE 6 United Kingdom
4 536365 RED WOOLLY HOTTIE WHITE HEART. 6 United Kingdom
In [11]:
df.dtypes
Out[11]:
InvoiceNo      object
Description    object
Quantity        int64
Country        object
dtype: object
In [12]:
df.describe()
Out[12]:
Quantity
count 531167.000000
mean 10.293623
std 159.301358
min -9600.000000
25% 1.000000
50% 3.000000
75% 10.000000
max 80995.000000
In [13]:
df.shape
Out[13]:
(531167, 4)
In [14]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 531167 entries, 0 to 531166
Data columns (total 4 columns):
 #   Column       Non-Null Count   Dtype 
---  ------       --------------   ----- 
 0   InvoiceNo    531167 non-null  object
 1   Description  531167 non-null  object
 2   Quantity     531167 non-null  int64 
 3   Country      531167 non-null  object
dtypes: int64(1), object(3)
memory usage: 16.2+ MB
In [15]:
df['Country'].value_counts()
Out[15]:
Country
United Kingdom          486168
Germany                   9042
France                    8408
EIRE                      7894
Spain                     2485
Netherlands               2363
Belgium                   2031
Switzerland               1967
Portugal                  1501
Australia                 1185
Norway                    1072
Italy                      758
Channel Islands            748
Finland                    685
Cyprus                     614
Sweden                     451
Unspecified                446
Austria                    398
Denmark                    380
Poland                     330
Japan                      321
Israel                     295
Hong Kong                  284
Singapore                  222
Iceland                    182
USA                        179
Canada                     151
Greece                     145
Malta                      112
United Arab Emirates        68
European Community          60
RSA                         58
Lebanon                     45
Lithuania                   35
Brazil                      32
Czech Republic              25
Bahrain                     18
Saudi Arabia                 9
Name: count, dtype: int64
In [16]:
top3 = df["Country"].value_counts().head(3)

plt.figure(figsize=[10, 6])
top3.plot(kind='bar', color='skyblue')
plt.xlabel('Country')
plt.ylabel('Number of Transactions')
plt.title('Top 10 Countries by Number of Transactions')
plt.xticks(rotation=45, ha='right')  
plt.tight_layout()
plt.show()
In [17]:
most_sold = df['Description'].value_counts().head(10)

plt.figure(figsize=[10, 6])
most_sold.plot(kind='bar', color='skyblue')
plt.xlabel('Items')
plt.ylabel('Number of Items sold')
plt.title('Top 10 Items Sold')
plt.xticks(rotation=45, ha='right')  
plt.tight_layout()
plt.show()

Data Training

In [18]:
def pivot_column(df, country):
    basket = (df[df['Country'] == country]
              .groupby(['InvoiceNo', 'Description'])['Quantity']
              .sum().unstack().reset_index().fillna(0)
              .set_index('InvoiceNo'))

    def encode_data(x):
        if x <= 0:
            return 0
        if x >= 1:
            return 1

    basket = basket.applymap(encode_data)
    basket.drop('POSTAGE', inplace=True, axis=1)
    basket = basket[(basket > 0).sum(axis=1) >= 2]
    return basket


uk_basket = pivot_column(df, 'United Kingdom')
germany_basket = pivot_column(df, 'Germany')
france_basket = pivot_column(df, 'France')


uk_frequent_itemsets = apriori(uk_basket, min_support=0.03, use_colnames=True)
germany_frequent_itemsets = apriori(germany_basket, min_support=0.03, use_colnames=True)
france_frequent_itemsets = apriori(france_basket, min_support=0.03, use_colnames=True)


uk_rules = association_rules(uk_frequent_itemsets, metric='lift', min_threshold=1).sort_values("lift", ascending=False).reset_index(drop=True)
germany_rules = association_rules(germany_frequent_itemsets, metric='lift', min_threshold=1).sort_values("lift", ascending=False).reset_index(drop=True)
france_rules = association_rules(france_frequent_itemsets, metric='lift', min_threshold=1).sort_values("lift", ascending=False).reset_index(drop=True)
D:\Anaconda\Lib\site-packages\mlxtend\frequent_patterns\fpcommon.py:110: DeprecationWarning: DataFrames with non-bool types result in worse computationalperformance and their support might be discontinued in the future.Please use a DataFrame with bool type
  warnings.warn(
D:\Anaconda\Lib\site-packages\mlxtend\frequent_patterns\fpcommon.py:110: DeprecationWarning: DataFrames with non-bool types result in worse computationalperformance and their support might be discontinued in the future.Please use a DataFrame with bool type
  warnings.warn(
D:\Anaconda\Lib\site-packages\mlxtend\frequent_patterns\fpcommon.py:110: DeprecationWarning: DataFrames with non-bool types result in worse computationalperformance and their support might be discontinued in the future.Please use a DataFrame with bool type
  warnings.warn(

UK Basket

UK Basket Generated Rules

In [19]:
uk_rules.head(100)
Out[19]:
antecedents consequents antecedent support consequent support support confidence lift leverage conviction zhangs_metric
0 (PINK REGENCY TEACUP AND SAUCER) (GREEN REGENCY TEACUP AND SAUCER) 0.042284 0.056500 0.034904 0.825465 14.610023 0.032515 5.405792 0.972683
1 (GREEN REGENCY TEACUP AND SAUCER) (PINK REGENCY TEACUP AND SAUCER) 0.056500 0.042284 0.034904 0.617773 14.610023 0.032515 2.505621 0.987338
2 (ROSES REGENCY TEACUP AND SAUCER) (PINK REGENCY TEACUP AND SAUCER) 0.057710 0.042284 0.033029 0.572327 13.535248 0.030589 2.239365 0.982838
3 (PINK REGENCY TEACUP AND SAUCER) (ROSES REGENCY TEACUP AND SAUCER) 0.042284 0.057710 0.033029 0.781116 13.535248 0.030589 4.304973 0.967008
4 (GARDENERS KNEELING PAD CUP OF TEA) (GARDENERS KNEELING PAD KEEP CALM) 0.045309 0.054262 0.032726 0.722296 13.311351 0.030268 3.405567 0.968770
5 (GARDENERS KNEELING PAD KEEP CALM) (GARDENERS KNEELING PAD CUP OF TEA) 0.054262 0.045309 0.032726 0.603122 13.311351 0.030268 2.405500 0.977941
6 (ROSES REGENCY TEACUP AND SAUCER) (GREEN REGENCY TEACUP AND SAUCER) 0.057710 0.056500 0.042405 0.734801 13.005345 0.039145 3.557704 0.979644
7 (GREEN REGENCY TEACUP AND SAUCER) (ROSES REGENCY TEACUP AND SAUCER) 0.056500 0.057710 0.042405 0.750535 13.005345 0.039145 3.777249 0.978387
8 (ALARM CLOCK BAKELIKE RED) (ALARM CLOCK BAKELIKE GREEN) 0.056197 0.052689 0.034057 0.606028 11.502008 0.031096 2.404514 0.967425
9 (ALARM CLOCK BAKELIKE GREEN) (ALARM CLOCK BAKELIKE RED) 0.052689 0.056197 0.034057 0.646383 11.502008 0.031096 2.669000 0.963842
10 (PAPER CHAIN KIT 50'S CHRISTMAS) (PAPER CHAIN KIT VINTAGE CHRISTMAS) 0.067631 0.048152 0.032545 0.481216 9.993705 0.029288 1.834769 0.965215
11 (PAPER CHAIN KIT VINTAGE CHRISTMAS) (PAPER CHAIN KIT 50'S CHRISTMAS) 0.048152 0.067631 0.032545 0.675879 9.993705 0.029288 2.876613 0.945463
12 (WOODEN PICTURE FRAME WHITE FINISH) (WOODEN FRAME ANTIQUE WHITE) 0.064969 0.056863 0.031940 0.491620 8.645715 0.028246 1.855182 0.945782
13 (WOODEN FRAME ANTIQUE WHITE) (WOODEN PICTURE FRAME WHITE FINISH) 0.056863 0.064969 0.031940 0.561702 8.645715 0.028246 2.133324 0.937653
14 (LUNCH BAG PINK POLKADOT) (LUNCH BAG BLACK SKULL.) 0.060795 0.073438 0.031577 0.519403 7.072694 0.027112 1.927940 0.914189
15 (LUNCH BAG BLACK SKULL.) (LUNCH BAG PINK POLKADOT) 0.073438 0.060795 0.031577 0.429984 7.072694 0.027112 1.647681 0.926663
16 (LUNCH BAG BLACK SKULL.) (LUNCH BAG SUKI DESIGN) 0.073438 0.061763 0.030186 0.411038 6.655110 0.025650 1.593035 0.917088
17 (LUNCH BAG SUKI DESIGN) (LUNCH BAG BLACK SKULL.) 0.061763 0.073438 0.030186 0.488737 6.655110 0.025650 1.812299 0.905677
18 (JUMBO STORAGE BAG SUKI) (JUMBO SHOPPER VINTAGE RED PAISLEY) 0.068356 0.068477 0.031033 0.453982 6.629666 0.026352 1.706030 0.911467
19 (JUMBO SHOPPER VINTAGE RED PAISLEY) (JUMBO STORAGE BAG SUKI) 0.068477 0.068356 0.031033 0.453180 6.629666 0.026352 1.703749 0.911586
20 (LUNCH BAG PINK POLKADOT) (LUNCH BAG RED RETROSPOT) 0.060795 0.084205 0.033513 0.551244 6.546416 0.028394 2.040740 0.902087
21 (LUNCH BAG RED RETROSPOT) (LUNCH BAG PINK POLKADOT) 0.084205 0.060795 0.033513 0.397989 6.546416 0.028394 1.560112 0.925147
22 (LUNCH BAG BLACK SKULL.) (LUNCH BAG CARS BLUE) 0.073438 0.064364 0.030428 0.414333 6.437345 0.025701 1.597556 0.911603
23 (LUNCH BAG CARS BLUE) (LUNCH BAG BLACK SKULL.) 0.064364 0.073438 0.030428 0.472744 6.437345 0.025701 1.757330 0.902762
24 (JUMBO BAG PINK POLKADOT) (JUMBO STORAGE BAG SUKI) 0.069990 0.068356 0.030549 0.436474 6.385262 0.025764 1.653239 0.906860
25 (JUMBO STORAGE BAG SUKI) (JUMBO BAG PINK POLKADOT) 0.068356 0.069990 0.030549 0.446903 6.385262 0.025764 1.681459 0.905270
26 (LUNCH BAG SUKI DESIGN) (LUNCH BAG RED RETROSPOT) 0.061763 0.084205 0.031214 0.505387 6.001832 0.026013 1.851537 0.888245
27 (LUNCH BAG RED RETROSPOT) (LUNCH BAG SUKI DESIGN) 0.084205 0.061763 0.031214 0.370690 6.001832 0.026013 1.490898 0.910012
28 (LUNCH BAG BLACK SKULL.) (LUNCH BAG RED RETROSPOT) 0.073438 0.084205 0.036719 0.500000 5.937859 0.030535 1.831589 0.897500
29 (LUNCH BAG RED RETROSPOT) (LUNCH BAG BLACK SKULL.) 0.084205 0.073438 0.036719 0.436063 5.937859 0.030535 1.643025 0.908052
30 (JUMBO BAG RED RETROSPOT) (JUMBO BAG PINK POLKADOT) 0.116327 0.069990 0.047487 0.408216 5.832519 0.039345 1.571538 0.937618
31 (JUMBO BAG PINK POLKADOT) (JUMBO BAG RED RETROSPOT) 0.069990 0.116327 0.047487 0.678479 5.832519 0.039345 2.748413 0.890901
32 (LUNCH BAG SPACEBOY DESIGN) (LUNCH BAG RED RETROSPOT) 0.062791 0.084205 0.030307 0.482659 5.731922 0.025019 1.770195 0.880848
33 (LUNCH BAG RED RETROSPOT) (LUNCH BAG SPACEBOY DESIGN) 0.084205 0.062791 0.030307 0.359914 5.731922 0.025019 1.464192 0.901445
34 (LUNCH BAG CARS BLUE) (LUNCH BAG RED RETROSPOT) 0.064364 0.084205 0.030791 0.478383 5.681147 0.025371 1.755685 0.880662
35 (LUNCH BAG RED RETROSPOT) (LUNCH BAG CARS BLUE) 0.084205 0.064364 0.030791 0.365661 5.681147 0.025371 1.474978 0.899742
36 (JUMBO BAG RED RETROSPOT) (JUMBO BAG STRAWBERRY) 0.116327 0.047910 0.031275 0.268851 5.611581 0.025701 1.302183 0.929979
37 (JUMBO BAG STRAWBERRY) (JUMBO BAG RED RETROSPOT) 0.047910 0.116327 0.031275 0.652778 5.611581 0.025701 2.544979 0.863151
38 (JUMBO BAG BAROQUE BLACK WHITE) (JUMBO BAG RED RETROSPOT) 0.054685 0.116327 0.034481 0.630531 5.420337 0.028119 2.391738 0.862686
39 (JUMBO BAG RED RETROSPOT) (JUMBO BAG BAROQUE BLACK WHITE) 0.116327 0.054685 0.034481 0.296412 5.420337 0.028119 1.343563 0.922863
40 (JUMBO BAG RED RETROSPOT) (JUMBO STORAGE BAG SUKI) 0.116327 0.068356 0.042224 0.362975 5.310028 0.034272 1.462490 0.918526
41 (JUMBO STORAGE BAG SUKI) (JUMBO BAG RED RETROSPOT) 0.068356 0.116327 0.042224 0.617699 5.310028 0.034272 2.311460 0.871231
42 (JUMBO SHOPPER VINTAGE RED PAISLEY) (JUMBO BAG RED RETROSPOT) 0.068477 0.116327 0.039744 0.580389 4.989290 0.031778 2.105933 0.858348
43 (JUMBO BAG RED RETROSPOT) (JUMBO SHOPPER VINTAGE RED PAISLEY) 0.116327 0.068477 0.039744 0.341654 4.989290 0.031778 1.414943 0.904826
44 (JUMBO BAG APPLES) (JUMBO BAG RED RETROSPOT) 0.053536 0.116327 0.030730 0.574011 4.934467 0.024502 2.074405 0.842445
45 (JUMBO BAG RED RETROSPOT) (JUMBO BAG APPLES) 0.116327 0.053536 0.030730 0.264171 4.934467 0.024502 1.286255 0.902306
46 (JUMBO BAG RED RETROSPOT) (LUNCH BAG RED RETROSPOT) 0.116327 0.084205 0.032121 0.276131 3.279255 0.022326 1.265139 0.786550
47 (LUNCH BAG RED RETROSPOT) (JUMBO BAG RED RETROSPOT) 0.084205 0.116327 0.032121 0.381466 3.279255 0.022326 1.428656 0.758961
In [20]:
uk_rules.sort_values('confidence', ascending=False)
Out[20]:
antecedents consequents antecedent support consequent support support confidence lift leverage conviction zhangs_metric
0 (PINK REGENCY TEACUP AND SAUCER) (GREEN REGENCY TEACUP AND SAUCER) 0.042284 0.056500 0.034904 0.825465 14.610023 0.032515 5.405792 0.972683
3 (PINK REGENCY TEACUP AND SAUCER) (ROSES REGENCY TEACUP AND SAUCER) 0.042284 0.057710 0.033029 0.781116 13.535248 0.030589 4.304973 0.967008
7 (GREEN REGENCY TEACUP AND SAUCER) (ROSES REGENCY TEACUP AND SAUCER) 0.056500 0.057710 0.042405 0.750535 13.005345 0.039145 3.777249 0.978387
6 (ROSES REGENCY TEACUP AND SAUCER) (GREEN REGENCY TEACUP AND SAUCER) 0.057710 0.056500 0.042405 0.734801 13.005345 0.039145 3.557704 0.979644
4 (GARDENERS KNEELING PAD CUP OF TEA) (GARDENERS KNEELING PAD KEEP CALM) 0.045309 0.054262 0.032726 0.722296 13.311351 0.030268 3.405567 0.968770
31 (JUMBO BAG PINK POLKADOT) (JUMBO BAG RED RETROSPOT) 0.069990 0.116327 0.047487 0.678479 5.832519 0.039345 2.748413 0.890901
11 (PAPER CHAIN KIT VINTAGE CHRISTMAS) (PAPER CHAIN KIT 50'S CHRISTMAS) 0.048152 0.067631 0.032545 0.675879 9.993705 0.029288 2.876613 0.945463
37 (JUMBO BAG STRAWBERRY) (JUMBO BAG RED RETROSPOT) 0.047910 0.116327 0.031275 0.652778 5.611581 0.025701 2.544979 0.863151
9 (ALARM CLOCK BAKELIKE GREEN) (ALARM CLOCK BAKELIKE RED) 0.052689 0.056197 0.034057 0.646383 11.502008 0.031096 2.669000 0.963842
38 (JUMBO BAG BAROQUE BLACK WHITE) (JUMBO BAG RED RETROSPOT) 0.054685 0.116327 0.034481 0.630531 5.420337 0.028119 2.391738 0.862686
1 (GREEN REGENCY TEACUP AND SAUCER) (PINK REGENCY TEACUP AND SAUCER) 0.056500 0.042284 0.034904 0.617773 14.610023 0.032515 2.505621 0.987338
41 (JUMBO STORAGE BAG SUKI) (JUMBO BAG RED RETROSPOT) 0.068356 0.116327 0.042224 0.617699 5.310028 0.034272 2.311460 0.871231
8 (ALARM CLOCK BAKELIKE RED) (ALARM CLOCK BAKELIKE GREEN) 0.056197 0.052689 0.034057 0.606028 11.502008 0.031096 2.404514 0.967425
5 (GARDENERS KNEELING PAD KEEP CALM) (GARDENERS KNEELING PAD CUP OF TEA) 0.054262 0.045309 0.032726 0.603122 13.311351 0.030268 2.405500 0.977941
42 (JUMBO SHOPPER VINTAGE RED PAISLEY) (JUMBO BAG RED RETROSPOT) 0.068477 0.116327 0.039744 0.580389 4.989290 0.031778 2.105933 0.858348
44 (JUMBO BAG APPLES) (JUMBO BAG RED RETROSPOT) 0.053536 0.116327 0.030730 0.574011 4.934467 0.024502 2.074405 0.842445
2 (ROSES REGENCY TEACUP AND SAUCER) (PINK REGENCY TEACUP AND SAUCER) 0.057710 0.042284 0.033029 0.572327 13.535248 0.030589 2.239365 0.982838
13 (WOODEN FRAME ANTIQUE WHITE) (WOODEN PICTURE FRAME WHITE FINISH) 0.056863 0.064969 0.031940 0.561702 8.645715 0.028246 2.133324 0.937653
20 (LUNCH BAG PINK POLKADOT) (LUNCH BAG RED RETROSPOT) 0.060795 0.084205 0.033513 0.551244 6.546416 0.028394 2.040740 0.902087
14 (LUNCH BAG PINK POLKADOT) (LUNCH BAG BLACK SKULL.) 0.060795 0.073438 0.031577 0.519403 7.072694 0.027112 1.927940 0.914189
26 (LUNCH BAG SUKI DESIGN) (LUNCH BAG RED RETROSPOT) 0.061763 0.084205 0.031214 0.505387 6.001832 0.026013 1.851537 0.888245
28 (LUNCH BAG BLACK SKULL.) (LUNCH BAG RED RETROSPOT) 0.073438 0.084205 0.036719 0.500000 5.937859 0.030535 1.831589 0.897500
12 (WOODEN PICTURE FRAME WHITE FINISH) (WOODEN FRAME ANTIQUE WHITE) 0.064969 0.056863 0.031940 0.491620 8.645715 0.028246 1.855182 0.945782
17 (LUNCH BAG SUKI DESIGN) (LUNCH BAG BLACK SKULL.) 0.061763 0.073438 0.030186 0.488737 6.655110 0.025650 1.812299 0.905677
32 (LUNCH BAG SPACEBOY DESIGN) (LUNCH BAG RED RETROSPOT) 0.062791 0.084205 0.030307 0.482659 5.731922 0.025019 1.770195 0.880848
10 (PAPER CHAIN KIT 50'S CHRISTMAS) (PAPER CHAIN KIT VINTAGE CHRISTMAS) 0.067631 0.048152 0.032545 0.481216 9.993705 0.029288 1.834769 0.965215
34 (LUNCH BAG CARS BLUE) (LUNCH BAG RED RETROSPOT) 0.064364 0.084205 0.030791 0.478383 5.681147 0.025371 1.755685 0.880662
23 (LUNCH BAG CARS BLUE) (LUNCH BAG BLACK SKULL.) 0.064364 0.073438 0.030428 0.472744 6.437345 0.025701 1.757330 0.902762
18 (JUMBO STORAGE BAG SUKI) (JUMBO SHOPPER VINTAGE RED PAISLEY) 0.068356 0.068477 0.031033 0.453982 6.629666 0.026352 1.706030 0.911467
19 (JUMBO SHOPPER VINTAGE RED PAISLEY) (JUMBO STORAGE BAG SUKI) 0.068477 0.068356 0.031033 0.453180 6.629666 0.026352 1.703749 0.911586
25 (JUMBO STORAGE BAG SUKI) (JUMBO BAG PINK POLKADOT) 0.068356 0.069990 0.030549 0.446903 6.385262 0.025764 1.681459 0.905270
24 (JUMBO BAG PINK POLKADOT) (JUMBO STORAGE BAG SUKI) 0.069990 0.068356 0.030549 0.436474 6.385262 0.025764 1.653239 0.906860
29 (LUNCH BAG RED RETROSPOT) (LUNCH BAG BLACK SKULL.) 0.084205 0.073438 0.036719 0.436063 5.937859 0.030535 1.643025 0.908052
15 (LUNCH BAG BLACK SKULL.) (LUNCH BAG PINK POLKADOT) 0.073438 0.060795 0.031577 0.429984 7.072694 0.027112 1.647681 0.926663
22 (LUNCH BAG BLACK SKULL.) (LUNCH BAG CARS BLUE) 0.073438 0.064364 0.030428 0.414333 6.437345 0.025701 1.597556 0.911603
16 (LUNCH BAG BLACK SKULL.) (LUNCH BAG SUKI DESIGN) 0.073438 0.061763 0.030186 0.411038 6.655110 0.025650 1.593035 0.917088
30 (JUMBO BAG RED RETROSPOT) (JUMBO BAG PINK POLKADOT) 0.116327 0.069990 0.047487 0.408216 5.832519 0.039345 1.571538 0.937618
21 (LUNCH BAG RED RETROSPOT) (LUNCH BAG PINK POLKADOT) 0.084205 0.060795 0.033513 0.397989 6.546416 0.028394 1.560112 0.925147
47 (LUNCH BAG RED RETROSPOT) (JUMBO BAG RED RETROSPOT) 0.084205 0.116327 0.032121 0.381466 3.279255 0.022326 1.428656 0.758961
27 (LUNCH BAG RED RETROSPOT) (LUNCH BAG SUKI DESIGN) 0.084205 0.061763 0.031214 0.370690 6.001832 0.026013 1.490898 0.910012
35 (LUNCH BAG RED RETROSPOT) (LUNCH BAG CARS BLUE) 0.084205 0.064364 0.030791 0.365661 5.681147 0.025371 1.474978 0.899742
40 (JUMBO BAG RED RETROSPOT) (JUMBO STORAGE BAG SUKI) 0.116327 0.068356 0.042224 0.362975 5.310028 0.034272 1.462490 0.918526
33 (LUNCH BAG RED RETROSPOT) (LUNCH BAG SPACEBOY DESIGN) 0.084205 0.062791 0.030307 0.359914 5.731922 0.025019 1.464192 0.901445
43 (JUMBO BAG RED RETROSPOT) (JUMBO SHOPPER VINTAGE RED PAISLEY) 0.116327 0.068477 0.039744 0.341654 4.989290 0.031778 1.414943 0.904826
39 (JUMBO BAG RED RETROSPOT) (JUMBO BAG BAROQUE BLACK WHITE) 0.116327 0.054685 0.034481 0.296412 5.420337 0.028119 1.343563 0.922863
46 (JUMBO BAG RED RETROSPOT) (LUNCH BAG RED RETROSPOT) 0.116327 0.084205 0.032121 0.276131 3.279255 0.022326 1.265139 0.786550
36 (JUMBO BAG RED RETROSPOT) (JUMBO BAG STRAWBERRY) 0.116327 0.047910 0.031275 0.268851 5.611581 0.025701 1.302183 0.929979
45 (JUMBO BAG RED RETROSPOT) (JUMBO BAG APPLES) 0.116327 0.053536 0.030730 0.264171 4.934467 0.024502 1.286255 0.902306
In [21]:
uk_rules[ (uk_rules['lift'] >= 3) & (uk_rules['confidence'] >= 0.5) & (uk_rules['support'] >= 0.03)]
Out[21]:
antecedents consequents antecedent support consequent support support confidence lift leverage conviction zhangs_metric
0 (PINK REGENCY TEACUP AND SAUCER) (GREEN REGENCY TEACUP AND SAUCER) 0.042284 0.056500 0.034904 0.825465 14.610023 0.032515 5.405792 0.972683
1 (GREEN REGENCY TEACUP AND SAUCER) (PINK REGENCY TEACUP AND SAUCER) 0.056500 0.042284 0.034904 0.617773 14.610023 0.032515 2.505621 0.987338
2 (ROSES REGENCY TEACUP AND SAUCER) (PINK REGENCY TEACUP AND SAUCER) 0.057710 0.042284 0.033029 0.572327 13.535248 0.030589 2.239365 0.982838
3 (PINK REGENCY TEACUP AND SAUCER) (ROSES REGENCY TEACUP AND SAUCER) 0.042284 0.057710 0.033029 0.781116 13.535248 0.030589 4.304973 0.967008
4 (GARDENERS KNEELING PAD CUP OF TEA) (GARDENERS KNEELING PAD KEEP CALM) 0.045309 0.054262 0.032726 0.722296 13.311351 0.030268 3.405567 0.968770
5 (GARDENERS KNEELING PAD KEEP CALM) (GARDENERS KNEELING PAD CUP OF TEA) 0.054262 0.045309 0.032726 0.603122 13.311351 0.030268 2.405500 0.977941
6 (ROSES REGENCY TEACUP AND SAUCER) (GREEN REGENCY TEACUP AND SAUCER) 0.057710 0.056500 0.042405 0.734801 13.005345 0.039145 3.557704 0.979644
7 (GREEN REGENCY TEACUP AND SAUCER) (ROSES REGENCY TEACUP AND SAUCER) 0.056500 0.057710 0.042405 0.750535 13.005345 0.039145 3.777249 0.978387
8 (ALARM CLOCK BAKELIKE RED) (ALARM CLOCK BAKELIKE GREEN) 0.056197 0.052689 0.034057 0.606028 11.502008 0.031096 2.404514 0.967425
9 (ALARM CLOCK BAKELIKE GREEN) (ALARM CLOCK BAKELIKE RED) 0.052689 0.056197 0.034057 0.646383 11.502008 0.031096 2.669000 0.963842
11 (PAPER CHAIN KIT VINTAGE CHRISTMAS) (PAPER CHAIN KIT 50'S CHRISTMAS) 0.048152 0.067631 0.032545 0.675879 9.993705 0.029288 2.876613 0.945463
13 (WOODEN FRAME ANTIQUE WHITE) (WOODEN PICTURE FRAME WHITE FINISH) 0.056863 0.064969 0.031940 0.561702 8.645715 0.028246 2.133324 0.937653
14 (LUNCH BAG PINK POLKADOT) (LUNCH BAG BLACK SKULL.) 0.060795 0.073438 0.031577 0.519403 7.072694 0.027112 1.927940 0.914189
20 (LUNCH BAG PINK POLKADOT) (LUNCH BAG RED RETROSPOT) 0.060795 0.084205 0.033513 0.551244 6.546416 0.028394 2.040740 0.902087
26 (LUNCH BAG SUKI DESIGN) (LUNCH BAG RED RETROSPOT) 0.061763 0.084205 0.031214 0.505387 6.001832 0.026013 1.851537 0.888245
28 (LUNCH BAG BLACK SKULL.) (LUNCH BAG RED RETROSPOT) 0.073438 0.084205 0.036719 0.500000 5.937859 0.030535 1.831589 0.897500
31 (JUMBO BAG PINK POLKADOT) (JUMBO BAG RED RETROSPOT) 0.069990 0.116327 0.047487 0.678479 5.832519 0.039345 2.748413 0.890901
37 (JUMBO BAG STRAWBERRY) (JUMBO BAG RED RETROSPOT) 0.047910 0.116327 0.031275 0.652778 5.611581 0.025701 2.544979 0.863151
38 (JUMBO BAG BAROQUE BLACK WHITE) (JUMBO BAG RED RETROSPOT) 0.054685 0.116327 0.034481 0.630531 5.420337 0.028119 2.391738 0.862686
41 (JUMBO STORAGE BAG SUKI) (JUMBO BAG RED RETROSPOT) 0.068356 0.116327 0.042224 0.617699 5.310028 0.034272 2.311460 0.871231
42 (JUMBO SHOPPER VINTAGE RED PAISLEY) (JUMBO BAG RED RETROSPOT) 0.068477 0.116327 0.039744 0.580389 4.989290 0.031778 2.105933 0.858348
44 (JUMBO BAG APPLES) (JUMBO BAG RED RETROSPOT) 0.053536 0.116327 0.030730 0.574011 4.934467 0.024502 2.074405 0.842445

UK Basket Predicted Visualization

In [22]:
top_rules = uk_rules.nlargest(10, 'lift') 

rule_labels = [f"{str(antecedent)} -> {str(consequent)}" for antecedent, consequent in zip(top_rules['antecedents'], top_rules['consequents'])]
plt.figure(figsize=(12, 12))
colors = plt.cm.Paired(range(len(top_rules)))
patches, texts, autotexts = plt.pie(top_rules['lift'], autopct='%1.1f%%', startangle=140, colors=colors, textprops={'fontsize': 14})
plt.legend(patches, rule_labels, title='Rules', loc='center left', bbox_to_anchor=(1, 0.5), prop={'size': 16})
plt.axis('equal')
plt.title('Top 10 Association Rules by Lift in UK')
plt.show()
In [23]:
top_rules = uk_rules.nlargest(10, 'confidence')
rule_labels = [f"{str(antecedent)} -> {str(consequent)}" for antecedent, consequent in zip(top_rules['antecedents'], top_rules['consequents'])]
plt.figure(figsize=(14, 12))
colors = plt.cm.Paired(range(len(top_rules)))
patches, texts, autotexts = plt.pie(top_rules['confidence'], autopct='%1.1f%%', startangle=140, colors=colors, textprops={'fontsize': 14})
plt.legend(patches, rule_labels, title='Rules', loc='center left', bbox_to_anchor=(1, 0.5), prop={'size': 16})
plt.axis('equal')
plt.title('Top 10 Association Rules by confidence in UK')
plt.show()
In [24]:
top_rules = uk_rules.nlargest(10, 'support')
rule_labels = [f"{str(antecedent)} -> {str(consequent)}" for antecedent, consequent in zip(top_rules['antecedents'], top_rules['consequents'])]
plt.figure(figsize=(14, 12))
colors = plt.cm.Paired(range(len(top_rules)))
patches, texts, autotexts = plt.pie(top_rules['support'], autopct='%1.1f%%', startangle=140, colors=colors, textprops={'fontsize': 14})
plt.legend(patches, rule_labels, title='Rules', loc='center left', bbox_to_anchor=(1, 0.5), prop={'size': 16})
plt.axis('equal')
plt.title('Top 10 Association Rules by support in UK')
plt.show()
In [25]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = make_subplots(rows=1, cols=1, specs=[[{'type': 'scatter3d'}]])

scatter = go.Scatter3d(
    x=uk_rules['support'],
    y=uk_rules['confidence'],
    z=uk_rules['lift'],
    mode='markers',
    marker=dict(color=uk_rules['support'], size=8, colorscale='Viridis', opacity=0.8),
    text=f"Support: {uk_rules['support']}, Confidence: {uk_rules['confidence']}, Lift: {uk_rules['lift']}"
)

fig.add_trace(scatter)
fig.update_layout(scene=dict(xaxis_title='Support', yaxis_title='Confidence', zaxis_title='Lift'))
fig.update_layout(title='Interactive 3D Scatter Plot of Support, Confidence, and Lift')
fig.show()
D:\Anaconda\Lib\site-packages\plotly\io\_renderers.py:395: DeprecationWarning:

distutils Version classes are deprecated. Use packaging.version instead.

D:\Anaconda\Lib\site-packages\plotly\io\_renderers.py:395: DeprecationWarning:

distutils Version classes are deprecated. Use packaging.version instead.

In [26]:
top_rules = uk_rules[
    (uk_rules['lift'] >= 3) &
    (uk_rules['confidence'] >= 0.5) &
    (uk_rules['support'] >= 0.03)
].nlargest(10, 'confidence')

top_rules_pivot = top_rules.pivot(index='antecedents', columns='consequents', values='confidence')

plt.figure(figsize=(14, 12))
sns.heatmap(top_rules_pivot, annot=True, cmap='YlGnBu', fmt='.2f', cbar_kws={'label': 'Confidence'})
plt.title('Top 10 Association Rules Heatmap based on Confidence')
plt.show()

GERMANY Basket

Germany Basket Generated Rules

In [27]:
germany_rules.head(100)
Out[27]:
antecedents consequents antecedent support consequent support support confidence lift leverage conviction zhangs_metric
0 (SPACEBOY CHILDRENS CUP) (SPACEBOY CHILDRENS BOWL) 0.046838 0.044496 0.039813 0.850000 19.102632 0.037729 6.370023 0.994219
1 (SPACEBOY CHILDRENS BOWL) (SPACEBOY CHILDRENS CUP) 0.044496 0.046838 0.039813 0.894737 19.102632 0.037729 9.055035 0.991782
2 (SWEETHEART CERAMIC TRINKET BOX) (STRAWBERRY CERAMIC TRINKET BOX) 0.035129 0.056206 0.030445 0.866667 15.419444 0.028471 7.078454 0.969193
3 (STRAWBERRY CERAMIC TRINKET BOX) (SWEETHEART CERAMIC TRINKET BOX) 0.056206 0.035129 0.030445 0.541667 15.419444 0.028471 2.105174 0.990838
4 (SET OF 12 FAIRY CAKE BAKING CASES) (SET OF 12 MINI LOAF BAKING CASES) 0.044496 0.044496 0.030445 0.684211 15.376731 0.028465 3.025761 0.978507
... ... ... ... ... ... ... ... ... ... ...
95 (ROUND SNACK BOXES SET OF4 WOODLAND) (ROUND SNACK BOXES SET OF 4 FRUITS, SPACEBOY L... 0.262295 0.042155 0.039813 0.151786 3.600694 0.028756 1.129249 0.979085
96 (WOODLAND CHARLOTTE BAG) (JUMBO BAG WOODLAND ANIMALS) 0.135831 0.105386 0.051522 0.379310 3.599234 0.037207 1.441322 0.835674
97 (JUMBO BAG WOODLAND ANIMALS) (WOODLAND CHARLOTTE BAG) 0.105386 0.135831 0.051522 0.488889 3.599234 0.037207 1.690765 0.807235
98 (ROUND SNACK BOXES SET OF 4 FRUITS, CHARLOTTE ... (ROUND SNACK BOXES SET OF4 WOODLAND) 0.035129 0.262295 0.032787 0.933333 3.558333 0.023573 11.065574 0.745146
99 (ROUND SNACK BOXES SET OF4 WOODLAND) (ROUND SNACK BOXES SET OF 4 FRUITS, CHARLOTTE ... 0.262295 0.035129 0.032787 0.125000 3.558333 0.023573 1.102710 0.974603

100 rows × 10 columns

In [28]:
germany_rules.sort_values('confidence', ascending=False)
Out[28]:
antecedents consequents antecedent support consequent support support confidence lift leverage conviction zhangs_metric
34 (ROUND SNACK BOXES SET OF4 WOODLAND, RED RETRO... (WOODLAND CHARLOTTE BAG) 0.032787 0.135831 0.032787 1.000000 7.362069 0.028333 inf 0.893462
94 (ROUND SNACK BOXES SET OF 4 FRUITS, SPACEBOY L... (ROUND SNACK BOXES SET OF4 WOODLAND) 0.042155 0.262295 0.039813 0.944444 3.600694 0.028756 13.278689 0.754063
21 (JUMBO BAG PINK POLKADOT) (JUMBO BAG RED RETROSPOT) 0.037471 0.081967 0.035129 0.937500 11.437500 0.032057 14.688525 0.948094
13 (JAM JAR WITH GREEN LID) (JAM JAR WITH PINK LID) 0.037471 0.067916 0.035129 0.937500 13.803879 0.032584 14.913349 0.963666
98 (ROUND SNACK BOXES SET OF 4 FRUITS, CHARLOTTE ... (ROUND SNACK BOXES SET OF4 WOODLAND) 0.035129 0.262295 0.032787 0.933333 3.558333 0.023573 11.065574 0.745146
... ... ... ... ... ... ... ... ... ... ...
222 (ROUND SNACK BOXES SET OF4 WOODLAND) (JAM JAR WITH PINK LID) 0.262295 0.067916 0.032787 0.125000 1.840517 0.014973 1.065239 0.619048
154 (ROUND SNACK BOXES SET OF4 WOODLAND) (SET OF 2 TEA TOWELS APPLE AND PEARS) 0.262295 0.051522 0.032787 0.125000 2.426136 0.019273 1.083975 0.796825
165 (ROUND SNACK BOXES SET OF4 WOODLAND) (RED HARMONICA IN BOX) 0.262295 0.049180 0.030445 0.116071 2.360119 0.017545 1.075675 0.781197
106 (ROUND SNACK BOXES SET OF4 WOODLAND) (ROUND SNACK BOXES SET OF 4 FRUITS, LUNCH BOX ... 0.262295 0.035129 0.030445 0.116071 3.304167 0.021231 1.091571 0.945299
217 (ROUND SNACK BOXES SET OF4 WOODLAND) (RETROSPOT TEA SET CERAMIC 11 PC) 0.262295 0.060890 0.030445 0.116071 1.906250 0.014474 1.062428 0.644444

244 rows × 10 columns

In [29]:
germany_rules[(germany_rules['lift'] >= 3) & (germany_rules['confidence'] >= 0.5) & (germany_rules['support'] >= 0.03)]
Out[29]:
antecedents consequents antecedent support consequent support support confidence lift leverage conviction zhangs_metric
0 (SPACEBOY CHILDRENS CUP) (SPACEBOY CHILDRENS BOWL) 0.046838 0.044496 0.039813 0.850000 19.102632 0.037729 6.370023 0.994219
1 (SPACEBOY CHILDRENS BOWL) (SPACEBOY CHILDRENS CUP) 0.044496 0.046838 0.039813 0.894737 19.102632 0.037729 9.055035 0.991782
2 (SWEETHEART CERAMIC TRINKET BOX) (STRAWBERRY CERAMIC TRINKET BOX) 0.035129 0.056206 0.030445 0.866667 15.419444 0.028471 7.078454 0.969193
3 (STRAWBERRY CERAMIC TRINKET BOX) (SWEETHEART CERAMIC TRINKET BOX) 0.056206 0.035129 0.030445 0.541667 15.419444 0.028471 2.105174 0.990838
4 (SET OF 12 FAIRY CAKE BAKING CASES) (SET OF 12 MINI LOAF BAKING CASES) 0.044496 0.044496 0.030445 0.684211 15.376731 0.028465 3.025761 0.978507
... ... ... ... ... ... ... ... ... ... ...
116 (ROUND SNACK BOXES SET OF4 WOODLAND) (ROUND SNACK BOXES SET OF 4 FRUITS) 0.262295 0.168618 0.140515 0.535714 3.177083 0.096287 1.790668 0.928889
117 (ROUND SNACK BOXES SET OF 4 FRUITS) (ROUND SNACK BOXES SET OF4 WOODLAND) 0.168618 0.262295 0.140515 0.833333 3.177083 0.096287 4.426230 0.824225
119 (ROUND SNACK BOXES SET OF4 WOODLAND, SPACEBOY ... (ROUND SNACK BOXES SET OF 4 FRUITS) 0.074941 0.168618 0.039813 0.531250 3.150608 0.027176 1.773614 0.737900
121 (ROUND SNACK BOXES SET OF 4 FRUITS, PLASTERS I... (ROUND SNACK BOXES SET OF4 WOODLAND) 0.053864 0.262295 0.044496 0.826087 3.149457 0.030368 4.241803 0.721339
125 (LUNCH BOX WITH CUTLERY RETROSPOT) (ROUND SNACK BOXES SET OF 4 FRUITS) 0.067916 0.168618 0.035129 0.517241 3.067529 0.023677 1.722148 0.723116

67 rows × 10 columns

Germany Basket Predicted Visualization

In [30]:
top_rules = germany_rules.nlargest(10, 'lift')

rule_labels = [f"{str(antecedent)} -> {str(consequent)}" for antecedent, consequent in zip(top_rules['antecedents'], top_rules['consequents'])]
plt.figure(figsize=(12, 12))
colors = plt.cm.Paired(range(len(top_rules)))
patches, texts, autotexts = plt.pie(top_rules['lift'], autopct='%1.1f%%', startangle=140, colors=colors, textprops={'fontsize': 14})
plt.legend(patches, rule_labels, title='Rules', loc='center left', bbox_to_anchor=(1, 0.5), prop={'size': 16})
plt.axis('equal')
plt.title('Top 10 Association Rules by Lift in Germany')
plt.show()
In [31]:
top_rules = germany_rules.nlargest(10, 'confidence')
rule_labels = [f"{str(antecedent)} -> {str(consequent)}" for antecedent, consequent in zip(top_rules['antecedents'], top_rules['consequents'])]
plt.figure(figsize=(14, 12))
colors = plt.cm.Paired(range(len(top_rules)))
patches, texts, autotexts = plt.pie(top_rules['confidence'], autopct='%1.1f%%', startangle=140, colors=colors, textprops={'fontsize': 14})
plt.legend(patches, rule_labels, title='Rules', loc='center left', bbox_to_anchor=(1, 0.5), prop={'size': 16})
plt.axis('equal')
plt.title('Top 10 Association Rules by confidence in Germany')
plt.show()
In [32]:
top_rules = germany_rules.nlargest(10, 'support')
rule_labels = [f"{str(antecedent)} -> {str(consequent)}" for antecedent, consequent in zip(top_rules['antecedents'], top_rules['consequents'])]
plt.figure(figsize=(14, 12))
colors = plt.cm.Paired(range(len(top_rules)))
patches, texts, autotexts = plt.pie(top_rules['support'], autopct='%1.1f%%', startangle=140, colors=colors, textprops={'fontsize': 14})
plt.legend(patches, rule_labels, title='Rules', loc='center left', bbox_to_anchor=(1, 0.5), prop={'size': 16})
plt.axis('equal')
plt.title('Top 10 Association Rules by support in Germany')
plt.show()
In [33]:
fig = make_subplots(rows=1, cols=1, specs=[[{'type': 'scatter3d'}]])

scatter = go.Scatter3d(
    x=germany_rules['support'],
    y=germany_rules['confidence'],
    z=germany_rules['lift'],
    mode='markers',
    marker=dict(color=germany_rules['support'], size=8, colorscale='Viridis', opacity=0.8),
    text=f"Support: {germany_rules['support']}, Confidence: {germany_rules['confidence']}, Lift: {germany_rules['lift']}"
)

fig.add_trace(scatter)
fig.update_layout(scene=dict(xaxis_title='Support', yaxis_title='Confidence', zaxis_title='Lift'))
fig.update_layout(title='Interactive 3D Scatter Plot of Support, Confidence, and Lift')
fig.show()
D:\Anaconda\Lib\site-packages\plotly\io\_renderers.py:395: DeprecationWarning:

distutils Version classes are deprecated. Use packaging.version instead.

D:\Anaconda\Lib\site-packages\plotly\io\_renderers.py:395: DeprecationWarning:

distutils Version classes are deprecated. Use packaging.version instead.

In [34]:
top_rules = germany_rules[
    (germany_rules['lift'] >= 3) &
    (germany_rules['confidence'] >= 0.5) &
    (germany_rules['support'] >= 0.03)
].nlargest(10, 'confidence')

top_rules_pivot = top_rules.pivot(index='antecedents', columns='consequents', values='confidence')

plt.figure(figsize=(14, 12))
sns.heatmap(top_rules_pivot, annot=True, cmap='YlGnBu', fmt='.2f', cbar_kws={'label': 'confidence'})
plt.title('Top 10 Association Rules Heatmap based on Confidence')
plt.show()

FRANCE Basket

FRANCE Basket Generated Rules

In [35]:
france_rules.head(100)
Out[35]:
antecedents consequents antecedent support consequent support support confidence lift leverage conviction zhangs_metric
0 (SPACEBOY CHILDRENS CUP, DOLLY GIRL CHILDRENS ... (SPACEBOY CHILDRENS BOWL, DOLLY GIRL CHILDRENS... 0.030055 0.035519 0.030055 1.000000 28.153846 0.028987 inf 0.994366
1 (SPACEBOY CHILDRENS BOWL, DOLLY GIRL CHILDRENS... (SPACEBOY CHILDRENS CUP, DOLLY GIRL CHILDRENS ... 0.035519 0.030055 0.030055 0.846154 28.153846 0.028987 6.304645 1.000000
2 (PACK OF 6 SKULL PAPER PLATES, SET/6 RED SPOTT... (PACK OF 20 SKULL PAPER NAPKINS, SET/6 RED SPO... 0.035519 0.032787 0.030055 0.846154 25.807692 0.028890 6.286885 0.996652
3 (PACK OF 6 SKULL PAPER PLATES, SET/6 RED SPOTT... (PACK OF 20 SKULL PAPER NAPKINS, SET/6 RED SPO... 0.035519 0.032787 0.030055 0.846154 25.807692 0.028890 6.286885 0.996652
4 (PACK OF 20 SKULL PAPER NAPKINS, SET/6 RED SPO... (PACK OF 6 SKULL PAPER PLATES, SET/6 RED SPOTT... 0.032787 0.035519 0.030055 0.916667 25.807692 0.028890 11.573770 0.993837
... ... ... ... ... ... ... ... ... ... ...
95 (SET/6 RED SPOTTY PAPER PLATES, PACK OF 6 SKUL... (PACK OF 6 SKULL PAPER PLATES, PACK OF 20 SKUL... 0.040984 0.035519 0.030055 0.733333 20.646154 0.028599 3.616803 0.992230
96 (DOLLY GIRL CHILDRENS BOWL, DOLLY GIRL CHILDRE... (SPACEBOY CHILDRENS CUP) 0.040984 0.035519 0.030055 0.733333 20.646154 0.028599 3.616803 0.992230
97 (SPACEBOY CHILDRENS CUP) (DOLLY GIRL CHILDRENS BOWL, DOLLY GIRL CHILDRE... 0.035519 0.040984 0.030055 0.846154 20.646154 0.028599 6.233607 0.986608
98 (PACK OF 6 SKULL PAPER PLATES, PACK OF 20 SKUL... (SET/6 RED SPOTTY PAPER PLATES, PACK OF 6 SKUL... 0.035519 0.040984 0.030055 0.846154 20.646154 0.028599 6.233607 0.986608
99 (SET/6 RED SPOTTY PAPER CUPS, PACK OF 20 SKULL... (SET/6 RED SPOTTY PAPER PLATES, PACK OF 6 SKUL... 0.040984 0.035519 0.030055 0.733333 20.646154 0.028599 3.616803 0.992230

100 rows × 10 columns

In [36]:
france_rules[ (france_rules['lift'] >= 3) & (france_rules['confidence'] >= 0.5) & (france_rules['support'] >= 0.03)] 
Out[36]:
antecedents consequents antecedent support consequent support support confidence lift leverage conviction zhangs_metric
0 (SPACEBOY CHILDRENS CUP, DOLLY GIRL CHILDRENS ... (SPACEBOY CHILDRENS BOWL, DOLLY GIRL CHILDRENS... 0.030055 0.035519 0.030055 1.000000 28.153846 0.028987 inf 0.994366
1 (SPACEBOY CHILDRENS BOWL, DOLLY GIRL CHILDRENS... (SPACEBOY CHILDRENS CUP, DOLLY GIRL CHILDRENS ... 0.035519 0.030055 0.030055 0.846154 28.153846 0.028987 6.304645 1.000000
2 (PACK OF 6 SKULL PAPER PLATES, SET/6 RED SPOTT... (PACK OF 20 SKULL PAPER NAPKINS, SET/6 RED SPO... 0.035519 0.032787 0.030055 0.846154 25.807692 0.028890 6.286885 0.996652
3 (PACK OF 6 SKULL PAPER PLATES, SET/6 RED SPOTT... (PACK OF 20 SKULL PAPER NAPKINS, SET/6 RED SPO... 0.035519 0.032787 0.030055 0.846154 25.807692 0.028890 6.286885 0.996652
4 (PACK OF 20 SKULL PAPER NAPKINS, SET/6 RED SPO... (PACK OF 6 SKULL PAPER PLATES, SET/6 RED SPOTT... 0.032787 0.035519 0.030055 0.916667 25.807692 0.028890 11.573770 0.993837
... ... ... ... ... ... ... ... ... ... ...
1373 (PLASTERS IN TIN SPACEBOY, LUNCH BAG SPACEBOY ... (ROUND SNACK BOXES SET OF4 WOODLAND) 0.057377 0.169399 0.030055 0.523810 3.092166 0.020335 1.744262 0.717787
1383 (RED RETROSPOT PICNIC BAG) (LUNCH BAG RED RETROSPOT) 0.076503 0.163934 0.038251 0.500000 3.050000 0.025710 1.672131 0.727811
1385 (ALARM CLOCK BAKELIKE RED, ALARM CLOCK BAKELIK... (ROUND SNACK BOXES SET OF4 WOODLAND) 0.084699 0.169399 0.043716 0.516129 3.046826 0.029368 1.716576 0.733955
1396 (PLASTERS IN TIN SKULLS) (PLASTERS IN TIN CIRCUS PARADE) 0.060109 0.180328 0.032787 0.545455 3.024793 0.021948 1.803279 0.712209
1400 (RED TOADSTOOL LED NIGHT LIGHT, PLASTERS IN TI... (PLASTERS IN TIN WOODLAND ANIMALS) 0.054645 0.183060 0.030055 0.550000 3.004478 0.020051 1.815422 0.705728

852 rows × 10 columns

In [37]:
france_rules.sort_values('confidence', ascending=False)
Out[37]:
antecedents consequents antecedent support consequent support support confidence lift leverage conviction zhangs_metric
0 (SPACEBOY CHILDRENS CUP, DOLLY GIRL CHILDRENS ... (SPACEBOY CHILDRENS BOWL, DOLLY GIRL CHILDRENS... 0.030055 0.035519 0.030055 1.000000 28.153846 0.028987 inf 0.994366
659 (PLASTERS IN TIN WOODLAND ANIMALS, ALARM CLOCK... (PLASTERS IN TIN SPACEBOY) 0.035519 0.147541 0.035519 1.000000 6.777778 0.030279 inf 0.883853
252 (SET/6 RED SPOTTY PAPER CUPS, SET/6 RED SPOTTY... (PACK OF 6 SKULL PAPER PLATES) 0.030055 0.060109 0.030055 1.000000 16.636364 0.028248 inf 0.969014
254 (PACK OF 20 SKULL PAPER NAPKINS, SET/6 RED SPO... (PACK OF 6 SKULL PAPER PLATES) 0.032787 0.060109 0.032787 1.000000 16.636364 0.030816 inf 0.971751
256 (SET/6 RED SPOTTY PAPER PLATES, PACK OF 20 SKU... (PACK OF 6 SKULL PAPER PLATES) 0.032787 0.060109 0.032787 1.000000 16.636364 0.030816 inf 0.971751
... ... ... ... ... ... ... ... ... ... ...
1091 (PLASTERS IN TIN WOODLAND ANIMALS) (PLASTERS IN TIN SPACEBOY, ALARM CLOCK BAKELIK... 0.183060 0.035519 0.030055 0.164179 4.622273 0.023553 1.153932 0.959258
1135 (PLASTERS IN TIN WOODLAND ANIMALS) (PLASTERS IN TIN SPACEBOY, LUNCH BOX WITH CUTL... 0.183060 0.038251 0.030055 0.164179 4.292111 0.023052 1.150664 0.938887
1801 (RED TOADSTOOL LED NIGHT LIGHT) (LUNCH BAG SPACEBOY DESIGN) 0.191257 0.128415 0.030055 0.157143 1.223708 0.005494 1.034084 0.226044
1757 (RED TOADSTOOL LED NIGHT LIGHT) (PLASTERS IN TIN CIRCUS PARADE, PLASTERS IN TI... 0.191257 0.109290 0.030055 0.157143 1.437857 0.009152 1.056775 0.376536
1591 (RABBIT NIGHT LIGHT) (JUMBO BAG APPLES) 0.196721 0.071038 0.030055 0.152778 2.150641 0.016080 1.096479 0.666048

1818 rows × 10 columns

France Basket Predicted Visualization

In [38]:
top_rules = france_rules.nlargest(10, 'lift') 
rule_labels = [f"{str(antecedent)} -> {str(consequent)}" for antecedent, consequent in zip(top_rules['antecedents'], top_rules['consequents'])]
plt.figure(figsize=(12, 12))
colors = plt.cm.Paired(range(len(top_rules)))
patches, texts, autotexts = plt.pie(top_rules['lift'], autopct='%1.1f%%', startangle=140, colors=colors, textprops={'fontsize': 24})
plt.legend(patches, rule_labels, title='Rules', loc='center left', bbox_to_anchor=(1, 0.5), prop={'size': 18})
plt.axis('equal')
plt.title('Top 10 Association Rules by Lift in France')
plt.show()
In [39]:
top_rules = france_rules.nlargest(10, 'confidence')
rule_labels = [f"{str(antecedent)} -> {str(consequent)}" for antecedent, consequent in zip(top_rules['antecedents'], top_rules['consequents'])]
plt.figure(figsize=(12, 12))
colors = plt.cm.Paired(range(len(top_rules)))
patches, texts, autotexts = plt.pie(top_rules['confidence'], autopct='%1.1f%%', startangle=140, colors=colors, textprops={'fontsize': 24})
plt.legend(patches, rule_labels, title='Rules', loc='center left', bbox_to_anchor=(1, 0.5), prop={'size': 18})
plt.axis('equal')
plt.title('Top 10 Association Rules by Confidence in France')
plt.show()
In [40]:
top_rules = france_rules.nlargest(10, 'support')
rule_labels = [f"{str(antecedent)} -> {str(consequent)}" for antecedent, consequent in zip(top_rules['antecedents'], top_rules['consequents'])]
plt.figure(figsize=(12, 12))
colors = plt.cm.Paired(range(len(top_rules)))
patches, texts, autotexts = plt.pie(top_rules['support'], autopct='%1.1f%%', startangle=140, colors=colors, textprops={'fontsize': 24})
plt.legend(patches, rule_labels, title='Rules', loc='center left', bbox_to_anchor=(1, 0.5), prop={'size': 18})
plt.axis('equal')
plt.title('Top 10 Association Rules by Support in France')
plt.show()
In [41]:
fig = make_subplots(rows=1, cols=1, specs=[[{'type': 'scatter3d'}]])

scatter = go.Scatter3d(
    x=france_rules['support'],
    y=france_rules['confidence'],
    z=france_rules['lift'],
    mode='markers',
    marker=dict(color=france_rules['support'], size=8, colorscale='Viridis', opacity=0.8),
    text=f"Support: {france_rules['support']}, Confidence: {france_rules['confidence']}, Lift: {france_rules['lift']}"
)

fig.add_trace(scatter)
fig.update_layout(scene=dict(xaxis_title='Support', yaxis_title='Confidence', zaxis_title='Lift'))
fig.update_layout(title='Interactive 3D Scatter Plot of Support, Confidence, and Lift')
fig.show()
D:\Anaconda\Lib\site-packages\plotly\io\_renderers.py:395: DeprecationWarning:

distutils Version classes are deprecated. Use packaging.version instead.

D:\Anaconda\Lib\site-packages\plotly\io\_renderers.py:395: DeprecationWarning:

distutils Version classes are deprecated. Use packaging.version instead.

In [42]:
top_rules = france_rules[
    (france_rules['lift'] >= 3) &
    (france_rules['confidence'] >= 0.5) &
    (france_rules['support'] >= 0.03)
].nlargest(10, 'confidence')

top_rules_pivot = top_rules.pivot(index='antecedents', columns='consequents', values='confidence')

plt.figure(figsize=(14, 12))
sns.heatmap(top_rules_pivot, annot=True, cmap='YlGnBu', fmt='.2f', cbar_kws={'label': 'confidence'}, annot_kws={'size': 16})

plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.title('Top 10 Association Rules Heatmap based on Confidence', fontsize=24)

plt.show()